home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Magnum One
/
Magnum One (Mid-American Digital) (Disc Manufacturing).iso
/
d12
/
cgazv5n4.arc
/
METAPHON.C
< prev
next >
Wrap
C/C++ Source or Header
|
1991-09-23
|
7KB
|
193 lines
/*--- Listing 1 -------------------------- METAPHON.C -----------
*
* Author: Gary A. Parker
* Compilers: Turbo C++ 1.0, MSC 6.0
* Memory model: any
*
* This is an implementation of the Metaphone algorithm developed
* by Lawrence Phillips. like the Soundex algorithm, it compares
* words that sound alike but are spelled differently. Metaphone
* was designed to overcome difficulties encountered with Soundex
*
* Usage:
* The calling function must pass three arguments:
*
* char *Word - the word to be converted to a 'metaph'
* char *Metaph - a MAXMETAPH+1 byte field for the result
* int Flag - a flag
*
* If Flag is 1 then a 'metaph' will be computed for Word and
* stored in Metaph. If Flag is 0, then the function will compute
* a 'metaph' for Word and compare it with the 'metaph', passed in
* Metaph. It will return 0 for a match, else -1. The function
* will also return -1 if Word is 0 bytes long.
*
* This code is hereby placed in the public domain.
*--------------------------------------------------------------*/
#include <ctype.h>
#define MAXMETAPH 4
int metaphone(char *, char *, int);
/* Character coding array */
static char vsvfn[26] = {
1,16,4,16,9,2,4,16,9,2,0,2,2,2,1,4,0,2,4,4,1,0,0,0,8,0};
/* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z*/
/* Macros to access character coding array */
#define vowel(x) (vsvfn[(x) - 'A'] & 1) /* AEIOU */
#define same(x) (vsvfn[(x) - 'A'] & 2) /* FJLMNR */
#define varson(x) (vsvfn[(x) - 'A'] & 4) /* CGPST */
#define frontv(x) (vsvfn[(x) - 'A'] & 8) /* EIY */
#define noghf(x) (vsvfn[(x) - 'A'] & 16) /* BDH */
int metaphone(char *Word, char *Metaph, int Flag)
{
char *n, *n_start, *n_end; /* pointers to string */
char *metaph, *metaph_end; /* pointers to metaph */
char ntrans[32]; /* word with uppercase letters */
char newm[8]; /* new metaph for comparison */
int KSflag; /* state flag for X translation */
/*
** Copy Word to internal buffer, dropping non-alphabetic
** characters and converting to upper case.
*/
for(n = ntrans + 1, n_end = ntrans + 30;
*Word && n < n_end; Word++)
if(isalpha(*Word)) *n++ = toupper(*Word);
if(n == ntrans + 1) return -1; /* return if 0 bytes */
n_end = n; /* set n_end to end of string */
/* ntrans[0] will always be == 0 */
*n++ = 0; *n = 0; /* pad with nulls */
n = ntrans + 1; /* assign pointer to start */
/* if doing a comparison, redirect pointers */
if(!Flag) { metaph = Metaph; Metaph = newm; }
/* check for PN KN GN AE WR WH and X at start */
switch(*n) {
case 'P': case 'K': case 'G':
if(*(n + 1) == 'N') *n++ = 0;
break;
case 'A': if(*(n + 1) == 'E') *n++ = 0;
break;
case 'W': if(*(n + 1) == 'R') *n++ = 0;
else if(*(n + 1) == 'H') {
*(n + 1) = *n;
*n++ = 0;
}
break;
case 'X': *n = 'S'; break;
}
/*
** Now, loop step through string, stopping at end of string
** or when the computed 'metaph' is MAXMETAPH characters long
*/
KSflag = 0; /* state flag for KS translation */
for(metaph_end = Metaph + MAXMETAPH, n_start = n;
n <= n_end && Metaph < metaph_end; n++) {
if (KSflag) {
KSflag = 0;
*Metaph++ = *n;
}
else {
/* drop duplicates except for CC */
if(*(n - 1) == *n && *n != 'C') continue;
/* check for F J L M N R or first letter vowel */
if(same(*n) || (n == n_start && vowel(*n)))
*Metaph++ = *n;
else switch(*n) {
case 'B':
if(n < n_end || *(n - 1) != 'M')
*Metaph++ = *n;
break;
case 'C':
if(*(n - 1) != 'S' || !frontv(*(n + 1))) {
if(*(n + 1) == 'I' && *(n + 2) == 'A')
*Metaph++ = 'X';
else if(frontv(*(n + 1))) *Metaph++ = 'S';
else if(*(n + 1) == 'H')
*Metaph++ = ((n == n_start &&
!vowel(*(n + 2))) ||
*(n -1) == 'S')
? (char)'K' : (char)'X';
else *Metaph++ = 'K';
}
break;
case 'D':
*Metaph++ = (*(n + 1) == 'G' && frontv(*(n + 2)))
? (char)'J' : (char)'T';
break;
case 'G':
if((*(n + 1) != 'H' || vowel(*(n + 2))) &&
(*(n + 1) != 'N' || ((n + 1) < n_end &&
(*(n + 2) != 'E' || *(n + 3) != 'D'))) &&
(*(n - 1) != 'D' || !frontv(*(n + 1)) ) )
*Metaph++ = (frontv(*(n + 1)) &&
*(n + 2) != 'G')
? (char)'J' : (char)'K';
else if(*(n + 1) == 'H' && !noghf(*(n - 3))
&& *(n - 4) != 'H') *Metaph++ = 'F';
break;
case 'H':
if(!varson(*(n - 1)) && (!vowel(*(n - 1)) ||
vowel(*(n + 1)))) *Metaph++ = 'H';
break;
case 'K':
if(*(n - 1) != 'C') *Metaph++ = 'K';
break;
case 'P':
*Metaph++ = *(n + 1) == 'H'
? (char)'F' : (char)'P';
break;
case 'Q':
*Metaph++ = 'K';
break;
case 'S':
*Metaph++ = (*(n + 1) == 'H' || (*(n + 1) == 'I'
&& (*(n + 2) == 'O' || *(n + 2) == 'A')))
? (char)'X' : (char)'S';
break;
case 'T':
if(*(n + 1) == 'I' && (*(n + 2) == 'O'
|| *(n + 2) == 'A') )
*Metaph++ = 'X';
else if(*(n + 1) == 'H') *Metaph++ = '0';
else if(*(n + 1) != 'C' || *(n + 2) != 'H')
*Metaph++ = 'T';
break;
case 'V': *Metaph++ = 'F'; break;
case 'W': case 'Y':
if(vowel(*(n + 1))) *Metaph++ = *n;
break;
case 'X':
if(n == n_start) *Metaph++ = 'S';
else {
*Metaph++ = 'K'; /* insert K, then S */
KSflag = 1; /* this flag will cause S to be
inserted on next pass thru loop */
}
break;
case 'Z': *Metaph++ = 'S'; break;
}
}
/* compare new metaph with old */
if(!Flag && *(Metaph-1) != metaph[(Metaph - newm) - 1])
return -1;
}
/* If comparing, check if metaphs were equal in length */
if(!Flag && metaph[Metaph - newm]) return -1;
*Metaph = 0; /* null terminate return value */
return 0;
}